This project analyzes the contents of different cereals ranging from their carbo, vitamin, sugar, pottassium contents
OBJECTIVES;
1. Clean and thouroughly analyze the dataset
2. Analyze some important trends in the dataset.
3. Answer data driven questions and showcase answers using charts.
#import libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
#import dataset
data=pd.read_csv(r"C:\Users\AISHAT\Desktop\PROJECTS\Cognorise\archive (1)\cereal.csv")
data.head()
| name | mfr | type | calories | protein | fat | sodium | fiber | carbo | sugars | potass | vitamins | shelf | weight | cups | rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 100% Bran | N | C | 70 | 4 | 1 | 130 | 10.0 | 5.0 | 6 | 280 | 25 | 3 | 1.0 | 0.33 | 68.40 |
| 1 | 100% Natural Bran | Q | C | 120 | 3 | 5 | 15 | 2.0 | 8.0 | 8 | 135 | 0 | 3 | 1.0 | 1.00 | 33.98 |
| 2 | All-Bran | K | C | 70 | 4 | 1 | 260 | 9.0 | 7.0 | 5 | 320 | 25 | 3 | 1.0 | 0.33 | 59.43 |
| 3 | All-Bran with Extra Fiber | K | C | 50 | 4 | 0 | 140 | 14.0 | 8.0 | 0 | 330 | 25 | 3 | 1.0 | 0.50 | 93.70 |
| 4 | Almond Delight | R | C | 110 | 2 | 2 | 200 | 1.0 | 14.0 | 8 | -1 | 25 | 3 | 1.0 | 0.75 | 34.38 |
#checking for null values
data.isnull().sum()
name 0 mfr 0 type 0 calories 0 protein 0 fat 0 sodium 0 fiber 0 carbo 0 sugars 0 potass 0 vitamins 0 shelf 0 weight 0 cups 0 rating 0 dtype: int64
data.duplicated().sum()
0
data.shape
(77, 16)
data.dtypes
name object mfr object type object calories int64 protein int64 fat int64 sodium int64 fiber float64 carbo float64 sugars int64 potass int64 vitamins int64 shelf int64 weight float64 cups float64 rating float64 dtype: object
data.describe()
| calories | protein | fat | sodium | fiber | carbo | sugars | potass | vitamins | shelf | weight | cups | rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 |
| mean | 106.883117 | 2.545455 | 1.012987 | 159.675325 | 2.151948 | 14.597403 | 6.922078 | 96.077922 | 28.246753 | 2.207792 | 1.029610 | 0.821039 | 42.665325 |
| std | 19.484119 | 1.094790 | 1.006473 | 83.832295 | 2.383364 | 4.278956 | 4.444885 | 71.286813 | 22.342523 | 0.832524 | 0.150477 | 0.232716 | 14.047301 |
| min | 50.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | -1.000000 | -1.000000 | -1.000000 | 0.000000 | 1.000000 | 0.500000 | 0.250000 | 18.040000 |
| 25% | 100.000000 | 2.000000 | 0.000000 | 130.000000 | 1.000000 | 12.000000 | 3.000000 | 40.000000 | 25.000000 | 1.000000 | 1.000000 | 0.670000 | 33.170000 |
| 50% | 110.000000 | 3.000000 | 1.000000 | 180.000000 | 2.000000 | 14.000000 | 7.000000 | 90.000000 | 25.000000 | 2.000000 | 1.000000 | 0.750000 | 40.400000 |
| 75% | 110.000000 | 3.000000 | 2.000000 | 210.000000 | 3.000000 | 17.000000 | 11.000000 | 120.000000 | 25.000000 | 3.000000 | 1.000000 | 1.000000 | 50.830000 |
| max | 160.000000 | 6.000000 | 5.000000 | 320.000000 | 14.000000 | 23.000000 | 15.000000 | 330.000000 | 100.000000 | 3.000000 | 1.500000 | 1.500000 | 93.700000 |
#products and their calories
figure=px.scatter(data_frame=data, x="name", y="calories", color="name", title="calories")
figure.show()
#products with the highest calories
cal=data.sort_values(by=["calories"], ascending=False).head(10)
cal
| name | mfr | type | calories | protein | fat | sodium | fiber | carbo | sugars | potass | vitamins | shelf | weight | cups | rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 46 | Mueslix Crispy Blend | K | C | 160 | 3 | 2 | 150 | 3.0 | 17.0 | 13 | 160 | 25 | 3 | 1.50 | 0.67 | 30.31 |
| 44 | Muesli Raisins; Dates; & Almonds | R | C | 150 | 4 | 3 | 95 | 3.0 | 16.0 | 11 | 170 | 25 | 3 | 1.00 | 1.00 | 37.14 |
| 45 | Muesli Raisins; Peaches; & Pecans | R | C | 150 | 4 | 3 | 150 | 3.0 | 16.0 | 11 | 170 | 25 | 3 | 1.00 | 1.00 | 34.14 |
| 70 | Total Raisin Bran | G | C | 140 | 3 | 1 | 190 | 4.0 | 15.0 | 14 | 230 | 100 | 3 | 1.50 | 1.00 | 28.59 |
| 39 | Just Right Fruit & Nut | K | C | 140 | 3 | 1 | 170 | 2.0 | 20.0 | 9 | 95 | 100 | 3 | 1.30 | 0.75 | 36.47 |
| 49 | Nutri-Grain Almond-Raisin | K | C | 140 | 3 | 2 | 220 | 3.0 | 21.0 | 7 | 130 | 25 | 3 | 1.33 | 0.67 | 40.69 |
| 51 | Oatmeal Raisin Crisp | G | C | 130 | 3 | 2 | 170 | 1.5 | 13.5 | 10 | 120 | 25 | 3 | 1.25 | 0.50 | 30.45 |
| 7 | Basic 4 | G | C | 130 | 3 | 2 | 210 | 2.0 | 18.0 | 8 | 100 | 25 | 3 | 1.33 | 0.75 | 37.04 |
| 52 | Post Nat. Raisin Bran | P | C | 120 | 3 | 1 | 200 | 6.0 | 11.0 | 14 | 260 | 25 | 3 | 1.33 | 0.67 | 37.84 |
| 35 | Honey Graham Ohs | Q | C | 120 | 1 | 2 | 220 | 1.0 | 12.0 | 11 | 45 | 25 | 2 | 1.00 | 1.00 | 21.87 |
#chat of products with the highest calories
figure=px.bar(data_frame=cal, x="name", y="calories", title="products with the highest calories", color="mfr")
figure.show()
#products with the lowest calories
l_cal=data.sort_values(by=["calories"], ascending=True).head(10)
figure=px.bar(data_frame=l_cal, x="name", y="calories", title="products with the lowest calories", color="mfr")
figure.show()
#products with the highest rating
rate=data.sort_values(by=["rating"], ascending=False).head(10)
rate
| name | mfr | type | calories | protein | fat | sodium | fiber | carbo | sugars | potass | vitamins | shelf | weight | cups | rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | All-Bran with Extra Fiber | K | C | 50 | 4 | 0 | 140 | 14.0 | 8.0 | 0 | 330 | 25 | 3 | 1.00 | 0.50 | 93.70 |
| 64 | Shredded Wheat 'n'Bran | N | C | 90 | 3 | 0 | 0 | 4.0 | 19.0 | 0 | 140 | 0 | 1 | 1.00 | 0.67 | 74.47 |
| 65 | Shredded Wheat spoon size | N | C | 90 | 3 | 0 | 0 | 3.0 | 20.0 | 0 | 120 | 0 | 1 | 1.00 | 0.67 | 72.80 |
| 0 | 100% Bran | N | C | 70 | 4 | 1 | 130 | 10.0 | 5.0 | 6 | 280 | 25 | 3 | 1.00 | 0.33 | 68.40 |
| 63 | Shredded Wheat | N | C | 80 | 2 | 0 | 0 | 3.0 | 16.0 | 0 | 95 | 0 | 1 | 0.83 | 1.00 | 68.24 |
| 20 | Cream of Wheat (Quick) | N | H | 100 | 3 | 0 | 80 | 1.0 | 21.0 | 0 | -1 | 0 | 2 | 1.00 | 1.00 | 64.53 |
| 55 | Puffed Wheat | Q | C | 50 | 2 | 0 | 0 | 1.0 | 10.0 | 0 | 50 | 0 | 3 | 0.50 | 1.00 | 63.01 |
| 54 | Puffed Rice | Q | C | 50 | 1 | 0 | 0 | 0.0 | 13.0 | 0 | 15 | 0 | 3 | 0.50 | 1.00 | 60.76 |
| 50 | Nutri-grain Wheat | K | C | 90 | 3 | 0 | 170 | 3.0 | 18.0 | 2 | 90 | 25 | 3 | 1.00 | 1.00 | 59.64 |
| 2 | All-Bran | K | C | 70 | 4 | 1 | 260 | 9.0 | 7.0 | 5 | 320 | 25 | 3 | 1.00 | 0.33 | 59.43 |
#chat of products with the highest rating
figure=px.bar(data_frame=rate, x="name", y="rating", title="products with the highest ratings", color="mfr")
figure.show()
#products with the lowest rating
l_rate=data.sort_values(by=["rating"], ascending=True).head(10)
figure=px.bar(data_frame=l_rate, x="name", y="rating", title="products with the lowest ratings", color="mfr")
figure.show()
figure=px.histogram(data_frame=data, x="rating", title="count of ratings")
figure.show()
#products with their weights
figure=px.pie(data_frame=data, names="weight", color="name", title="%weights")
figure.show()